Distribuição Normal e K-means

set.seed(1909)
x <- rnorm(15, mean = rep(1:3, each = 5), sd = 0.2)
y <- rnorm(15, mean = rep(c(1,2), each = 5), sd = 0.2)
plot(x, y, col = "blue", pch = 19, cex = 1)
text(x + 0.05, y + 0.05, labels = as.character(1:15))

df <- data.frame(x, y)
nclusters <- 3
modelo.kmean <- kmeans(x = df, centers = nclusters)
plot(x, y, col = modelo.kmean$cluster, pch = 19, cex = 2)
points(modelo.kmean$centers, col = 1:nclusters, pch = 3, cex = 3, lwd = 2)

Importando o DataSet Fifa

fifa_game_2 <- read.csv('C:/Users/regis.spindola/Documents/Projetos/MBA/fifa game-2.csv')

O Dataset possui 17994 observações Filtrar em um dataset menor usando o dplyr que contemple:

* Atributos name, dribbling, acceleration

* Apenas os 602 jogadores da liga

* “Spanish Primera División”

library("dplyr")
## Warning: package 'dplyr' was built under R version 3.4.4
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("plotly")
## Warning: package 'plotly' was built under R version 3.4.4
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.4.4
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
fifa_game_2 %>%   filter(league == "Spanish Primera División") %>%   
  select(name, dribbling, acceleration) -> liga_spanish 
## Warning: package 'bindrcpp' was built under R version 3.4.4
  plot(liga_spanish$dribbling, liga_spanish$acceleration) 

##Rode o k-means com 4 centróides##

plot(liga_spanish[,-1]) 

set.seed(2) 
modelo.fifa <- kmeans(liga_spanish[,-1], centers = 4) 
plot(liga_spanish[,-1], col= modelo.fifa$cluster, pch=21, cex=1) 
points(modelo.fifa$centers, col=4:1, bg=1:4, pch=24, cex=1, lwd=1) 

##Quais são os jogadores que aparecem no gráfico?##

fifa_game_2 %>%   
  filter(league == "Spanish Primera División") %>%   
  select(name, dribbling, acceleration, long_passing) -> liga_spanish2    
  plot(liga_spanish2[,-1]) 

  set.seed(2) 
  modelo.fifa <- kmeans(liga_spanish[,-1], centers = 4) 
  plot(liga_spanish2[,-1], col= modelo.fifa$cluster, pch=21, cex=1) 
  points(modelo.fifa$centers, col=4:1, bg=1:4, pch=24, cex=1, lwd=1)

plot_ly(data = liga_spanish2, x = ~dribbling, y = ~acceleration, text = ~name, type ='scatter', mode ='markers') 

##Paleta de Cores##

colorPallete1 = c("red", "yellow", "green") 
plot(x=seq_along(colorPallete1), col=colorPallete1, pch=19, cex=10) 

gradiente1 = colorRampPalette(colorPallete1)(n = 30) 
plot(x=seq_along(gradiente1), col=gradiente1, pch=19, cex=10)  

colorPallete2 = c("#f8696b", "#ffeb84", "#63be7b") 
plot(x=seq_along(colorPallete2), col=colorPallete2, pch=19, cex=10) 

gradiente2 = colorRampPalette(colorPallete2)(n = 30) 
plot(x=seq_along(gradiente2), col=gradiente2, pch=19, cex=10) 

cores <- c("red", "orange", "yellow", "green", "blue") 
plot(x=seq_along(cores), col=cores, pch=19, cex=10) 

gradiente3 = colorRampPalette(cores)(n = 30) 
plot(x=seq_along(gradiente3), col=gradiente3, pch=19, cex=10)